Phonetics Tools in R - Supplementary code

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.7
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(PraatR)
library(rPraat)
library(phonR)
library(dygraphs)
library(phonTools)
## 
## Attaching package: 'phonTools'
## The following object is masked from 'package:dplyr':
## 
##     slice
library(audio)
## 
## Attaching package: 'audio'
## The following object is masked from 'package:phonTools':
## 
##     play
library(tuneR)
## 
## Attaching package: 'tuneR'
## The following object is masked from 'package:audio':
## 
##     play
## The following objects are masked from 'package:phonTools':
## 
##     normalize, play
library(gss)
absdir = "/Users/goldrch2/Desktop/Research/FemaleBP/"
mydir = list.files(absdir, "*.WAV")
mydir
##  [1] "BP17-L-babado.WAV"  "BP17-L-cabido.WAV"  "BP17-L-tributo.WAV"
##  [4] "BP18-L-babado.WAV"  "BP18-L-cabido.WAV"  "BP18-L-tributo.WAV"
##  [7] "BP19-L-babado.WAV"  "BP19-L-cabido.WAV"  "BP19-L-tributo.WAV"
## [10] "BP20-L-babado.WAV"  "BP20-L-cabido.WAV"  "BP20-L-tributo.WAV"
## [13] "BP21-L-babado.WAV"  "BP21-L-cabido.WAV"  "BP21-L-tributo.WAV"
testwav = paste0(absdir, mydir[1])
testwav
## [1] "/Users/goldrch2/Desktop/Research/FemaleBP/BP17-L-babado.WAV"
testtext = str_replace(testwav, ".WAV", ".TextGrid")
testformant = str_replace(testwav, ".WAV", ".Formant")
testpitch = str_replace(testwav, ".WAV", ".Pitch")
testpitchtier = str_replace(testwav, ".WAV", ".PitchTier")
testint = str_replace(testwav, ".WAV", ".Intensity")
testinttier = str_replace(testwav, ".WAV", ".IntensityTier")
testtable = str_replace(testwav, ".WAV", ".Table")
praat( "To Pitch...", arguments=list( 0.001, 75,350), input=testwav, output=testpitch, overwrite=TRUE )
praat( "Down to PitchTier", input=testpitch, output=testpitchtier, overwrite=TRUE, filetype="headerless spreadsheet" )

praat( "To Intensity...", arguments = list(100, 0), input=testwav, output=testint, overwrite=TRUE )
praat( "Down to IntensityTier", input=testint, output=testinttier, overwrite=TRUE, filetype="text" )

praat( "To Formant (burg)...", arguments = list(0, 5, 5500, 0.025, 50), input=testwav, output=testformant, overwrite=TRUE )
form.test = formant.read(testformant)
tg.test = tg.read(testtext)
pt.test = pt.read(testpitchtier)
it.test = it.read(testinttier)

tg.testmini = tg.cut0(tg.test, tStart = 0, tEnd = 5)
pt.testmini = pt.cut0(pt.test, tStart = 0, tEnd = 5)
it.testmini = it.cut0(it.test, tStart = 0, tEnd = 5)
mylength = length(form.test$t[form.test$t<=5])
form.testmini = form.test
form.testmini$t = form.testmini$t[1:mylength]
form.testmini$xmax = form.testmini$t[mylength]
form.testmini$frame = form.test$frame[1:mylength] 
form.testmini$nx = mylength

formlist = data.frame(form.test$t, do.call("rbind", form.test$frame)) %>% mutate(frequency = str_replace_all(frequency, "(^[^[:digit:]]+|\\)$)", ""), bandwidth = str_replace_all(bandwidth, "(^[^[:digit:]]+|\\)$)", "")) %>% separate(frequency, into =c("f1", "f2", "f3", "f4", "f5"), sep = ",") %>% separate(bandwidth, into =c("f1_bw", "f2_bw", "f3_bw", "f4_bw", "f5_bw"), sep = ",") %>% mutate_at(.vars = c(4:13), .funs  = "as.numeric")
#tg.plot(tg.test, group = "testplot")
#pt.plot(pt.test, group = "testplot")
#it.plot(it.test, group = "testplot")
# formant.plot(form.test, group = "testplot", scaleIntensity = TRUE, drawBandwidth = TRUE)
# tg.plot(tg.testmini, formant = form.testmini)
# tg.plot(tg.testmini, pt = pt.testmini, it = it.testmini)

tg.plot(tg.testmini, group = "testplot2")
pt.plot(pt.testmini, group = "testplot2")
it.plot(it.testmini, group = "testplot2")
formant.plot(form.testmini, group = "testplot2", scaleIntensity = TRUE, drawBandwidth = TRUE)
tg.plot(tg.testmini, formant = form.testmini)
tg.plot(tg.testmini, pt = pt.testmini, it = it.testmini)
#plot sound
sndWav <- readWave(testwav)
fs <- sndWav@samp.rate
snd <- sndWav@left / (2^(sndWav@bit-1))
t <- seqM(0, (length(snd)-1)/fs, by = 1/fs)

snddf = data.frame(t, snd) %>% filter(t <=5)
dygraph(snddf, xlab = "Time (sec)", group = "testplot")%>% dyRangeSelector(height = 20)
tg.plot(tg.testmini, formant = form.testmini, group = "testplot")

Getting information from two tiers

Sometimes you will want to include information from two tiers - for example, if you have a phone tier and a syllable tier, you might want to know what syllable each phone is in. I give an example using the data.table package here. The exmaple is given using the sample data that comes from the rPraat package, sicne my dissertation data does not include two annotated tiers.

library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
#get sample data
tgtg = tg.sample()
ptsamp = pt.sample()

#create data frames for phone and syllable tiers
tg1 = data.frame(t11 =tgtg$phone$t1, t21 = tgtg$phone$t2, label1 = tgtg$phone$label)
tg2 = data.frame(t12 =tgtg$syllable$t1, t22 = tgtg$syllable$t2, label2= tgtg$syllable$label)

#this is getting five points of f0 throughout each phoneme. 
#first, get times for calculating f0 and saving them into the tg data frame
tg1= tg1 %>% mutate(RepNo = as.numeric(as.factor(t11))) %>% group_by(t11) %>% 
  mutate(normtime = list(seq(0.1,1.0,.2)),
         acttimenorm= list(seq(from = t11, to = t21, by = (t21-t11)/4))) %>%
  unnest() %>% mutate(f0=0)

#now extracting f0 from the pitch tier data
for (i in 1:length(tg1$t11)){
  mycurpoint = which.min(abs(tg1$acttimenorm[i] - ptsamp$t))
  tg1$f0[i] = ptsamp$f[mycurpoint]
}

head(tg1)
## # A tibble: 6 x 7
## # Groups:   t11 [2]
##      t11    t21 label1 RepNo normtime acttimenorm    f0
##    <dbl>  <dbl> <fct>  <dbl>    <dbl>       <dbl> <dbl>
## 1 0.008  0.0966 ""         1      0.1      0.008   210.
## 2 0.008  0.0966 ""         1      0.3      0.0301  210.
## 3 0.008  0.0966 ""         1      0.5      0.0523  210.
## 4 0.008  0.0966 ""         1      0.7      0.0744  210.
## 5 0.008  0.0966 ""         1      0.9      0.0966  210.
## 6 0.0966 0.145  j          2      0.1      0.0966  210.
#create data tables and set keys
tg1 = data.table(tg1)
tg2 = data.table(tg2)
setkey(tg1)
setkey(tg2)
#combine syllable level with phone level tgs
tgall = foverlaps(tg1, tg2, by.x  = c("t11", "t21"), by.y = c("t12", "t22"), type = "within")